今天一樣是 Supporting PMUs on RISC-V platforms
相關的內容,先來簡單回顧昨天的簡介:
導致了 perf stat 堪用,而 perf record 無法使用的狀況。
接下來,一樣先就文件本身來研究,再來記錄 perf stat 和 perf record 的區別。
pmu 初始化 (Initialization)
:
riscv_pmu
是 RISC-V 平台上,pmu 的一個實例(instance),預設是指向 riscv_base_pmu
這一個最基礎的(baseline) pmu 實作,不同的實作者可以根據自己的需求來擴充這個資料結構。/* ${linux}/arch/riscv/kernel/perf_event.c */
static const struct riscv_pmu *riscv_pmu __read_mostly;
static const struct riscv_pmu riscv_base_pmu = { // pmu 內部資料結構
.pmu = &min_pmu,
.max_events = ARRAY_SIZE(riscv_hw_event_map),
.map_hw_event = riscv_map_hw_event,
.hw_events = riscv_hw_event_map,
.map_cache_event = riscv_map_cache_event,
.cache_events = &riscv_cache_event_map,
.counter_width = 63,
.num_counters = RISCV_BASE_COUNTERS + 0,
.handle_irq = &riscv_base_pmu_handle_irq,
/* This means this PMU has no IRQ. */
.irq = -1,
};
static int __init init_hw_perf_events(void)
{
struct device_node *node = of_find_node_by_type(NULL, "pmu");
const struct of_device_id *of_id;
riscv_pmu = &riscv_base_pmu; // 預設為 riscv_base_pmu
if (node) {
of_id = of_match_node(riscv_pmu_of_ids, node); // 找尋 dts 裡面的 pmu node
if (of_id)
riscv_pmu = of_id->data;
of_node_put(node);
}
perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
return 0;
}
arch_initcall(init_hw_perf_events); // kernel 在 initcall 初始化 arch 時,會執行這個 function
pmu 事件初始化 (Event Initialization)
+ 使用 perf
時,perf 會執行 perf_event_open
這個系統呼叫 (system call),接下來就會執行 event_init 裡面的 member。
+ 目前 RISC-V 僅支援 cycle、instruction count 這兩項 event
/* ${linux}/arch/riscv/kernel/perf_event.c */
static const int riscv_hw_event_map[] = { // baseline 僅支援計算 cycle、instruction count
[PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
[PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
[PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
};
static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = { // L1 Dcache
[C(OP_READ)] = { // READ 操作
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
...
};
static int riscv_event_init(struct perf_event *event)
{
...
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
code = riscv_pmu->map_hw_event(attr->config); // init hardware 相關 event
break;
case PERF_TYPE_HW_CACHE:
code = riscv_pmu->map_cache_event(attr->config);
break;
case PERF_TYPE_RAW:
return -EOPNOTSUPP;
default:
return -ENOENT;
}
event->destroy = riscv_event_destroy;
if (code < 0) {
event->destroy(event);
return code;
}
...
}
static int riscv_map_hw_event(u64 config) // 實際 init (mapping) 過程
{
...
return riscv_pmu->hw_events[config];
}
中斷 (Interrupt)
reserve_pmc_hardware
,將這個 service routine 變成 globally 可存取的。static int reserve_pmc_hardware(void)
{
int err = 0;
mutex_lock(&pmc_reserve_mutex);
if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
IRQF_PERCPU, "riscv-base-perf", NULL);
}
mutex_unlock(&pmc_reserve_mutex);
return err;
}
存取計數器 (Reading/Writing Counters)
pmu->start
時,要將 counter 設置成一個適當的數值,並且等待 overflow 發生;另一個則是,在 overflow 的 handler 中,把 counter 設回一開始那個適當的數值。static inline u64 read_counter(int idx)
{
u64 val = 0;
switch (idx) {
case RISCV_PMU_CYCLE:
val = csr_read(CSR_CYCLE);
break;
case RISCV_PMU_INSTRET:
val = csr_read(CSR_INSTRET);
break;
default:
WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
return -EINVAL;
}
return val;
}
static inline void write_counter(int idx, u64 value) // 目前在 S mode 不支援 write counter
{
/* currently not supported */
WARN_ON_ONCE(1);
}
add()/del()/start()/stop()
今天簡單回顧了昨天提到的部分,然後簡單地把文件下半部份記錄完成 (缺少了 perf 的部分,明天再來補囉),明天我們來看看新的 extension 長什麼樣子,以及 Alan 提出的 Andes 解決方案到社群和大家討論的過程! 明天見!